knitr::opts_chunk$set(echo = TRUE)
library(readr)
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
```r
install.packages(\tidytext\)
install.packages(\textstem\)
install.packages(\clinspacy\)
install.packages(\topicmodels\)
install.packages(\reshape2\)
install.packages(\stringr\)
<!-- rnb-source-end -->
<!-- rnb-chunk-end -->
<!-- rnb-text-begin -->
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxubGlicmFyeSh0aWR5dGV4dClcbmxpYnJhcnkodGV4dHN0ZW0pXG5saWJyYXJ5KGNsaW5zcGFjeSlcbmxpYnJhcnkodG9waWNtb2RlbHMpXG5saWJyYXJ5KHJlc2hhcGUyKVxubGlicmFyeShzdHJpbmdyKVxuYGBgIn0= -->
```r
library(tidytext)
library(textstem)
library(clinspacy)
library(topicmodels)
library(reshape2)
library(stringr)
###Data Parsing
raw.data <- clinspacy::dataset_mtsamples()
dplyr::glimpse(raw.data)
Rows: 4,999
Columns: 6
$ note_id <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, …
$ description <chr> "A 23-year-old white female presents with complaint of allergies.", "Consult for laparoscopic g…
$ medical_specialty <chr> "Allergy / Immunology", "Bariatrics", "Bariatrics", "Cardiovascular / Pulmonary", "Cardiovascul…
$ sample_name <chr> "Allergic Rhinitis", "Laparoscopic Gastric Bypass Consult - 2", "Laparoscopic Gastric Bypass Co…
$ transcription <chr> "SUBJECTIVE:, This 23-year-old white female presents with complaint of allergies. She used to…
$ keywords <chr> "allergy / immunology, allergic rhinitis, allergies, asthma, nasal sprays, rhinitis, nasal, ery…
1
###Data Description
##NoteID - This variable is the unique ID for each note.
##Description - This output provides a summary of the SOAP notes.
##Medical_Specialty - This variable refers to the medical speciality
the patient is visiting.
##Sample_Name - This variable refers to the procedures undergone for
each patient.
##Transcription - This output provides a full transcript of the
physicians SOAP notes.
##Keywords - This output collects keywords from the
Medical_Speciality, Sample_Name, and Transcription variables.
raw.data %>% dplyr::select(medical_specialty) %>% dplyr::n_distinct()
[1] 40
###Transcripts per specialty
ggplot2::ggplot(raw.data, ggplot2::aes(y=medical_specialty)) + ggplot2::geom_bar() + labs(x="Document Count", y="Medical Speciality")

filtered.data <- raw.data %>% dplyr::filter(medical_specialty %in% c("Orthopedic", "Radiology", "Surgery"))
###Text Processing
analysis.data <- filtered.data %>%
unnest_tokens(word, transcription) %>%
mutate(word = str_replace_all(word, "[^[:alnum:]]", "")) %>%
filter(!str_detect(word, "[0-9]")) %>%
anti_join(stop_words) %>%
group_by(note_id) %>%
summarise(transcription = paste(word, collapse = " ")) %>%
left_join(select(filtered.data, -transcription), by = "note_id")
Joining with `by = join_by(word)`
tokenized.data.unigram <- analysis.data %>% tidytext::unnest_tokens(word, transcription, to_lower=TRUE)
tokenized.data <- analysis.data %>% tidytext::unnest_tokens(ngram, transcription, token = "ngrams", n=2, to_lower = TRUE)
2
###Unique Tokens per Speciality
tokenized.data.unigram %>% dplyr::group_by(medical_specialty) %>% dplyr::distinct(word) %>% dplyr::summarise(n=dplyr::n())
##Unique Unigrams
##There are 7682 unique unigrams in the orthopedic speciality.
##There are 5935 unique unigrams in the radiology speciality.
##There are 11977 unique unigrams in the surgery speciality.
tokenized.data %>% dplyr::group_by(medical_specialty) %>% dplyr::distinct(ngram) %>% dplyr::summarise(n=dplyr::n())
word_counts <- tokenized.data.unigram %>%
group_by(word) %>%
summarise(count = n()) %>%
ungroup() %>%
arrange(desc(count))
count_distribution <- word_counts %>%
group_by(count) %>%
summarise(num_words = n()) %>%
ungroup()
ggplot2::ggplot(count_distribution, aes(x = count, y = num_words)) +
geom_point() +
labs(title = "Scatter Plot of Count Distribution",
x = "Count of Unique Words",
y = "Number of Words")

word_counts <- tokenized.data %>%
group_by(ngram) %>%
summarise(count = n()) %>%
ungroup() %>%
arrange(desc(count))
count_distribution <- word_counts %>%
group_by(count) %>%
summarise(num_words = n()) %>%
ungroup()
ggplot2::ggplot(count_distribution, aes(x = count, y = num_words)) +
geom_point() +
labs(title = "Scatter Plot of Count Distribution",
x = "Count of Unique Bigrams",
y = "Number of Words")

3 ###Unique bigrams per category
tokenized.data %>% dplyr::group_by(medical_specialty) %>% dplyr::distinct(ngram) %>% dplyr::summarise(n=dplyr::n())
##Unique Bigrams
##There are 55732 unique bigrams in the orthopedic specialty.
##There are 28297 unique bigrams in the radiology speciality.
##There are 130404 unique bigrams in the surgey speciality.
4
##Unique Sentences
analysis.data <- filtered.data %>%
unnest_tokens(sentence, transcription, token = "sentences") %>%
mutate(sentence = str_replace_all(sentence, "[^[:alnum:]\\s]", "")) %>%
filter(!str_detect(sentence, "[0-9]")) %>%
cross_join(stop_words) %>%
group_by(note_id) %>%
summarise(transcription = paste(sentence, collapse = " ")) %>%
left_join(select(filtered.data, -transcription), by = "note_id")
?cross_join
?str_detect
tokenized.data.sentence <- analysis.data %>% tidytext::unnest_tokens(ngram, transcription, token = "sentences", to_lower = TRUE)
tokenized.data.sentence %>%
dplyr::group_by(medical_specialty) %>%
dplyr::count(name = "n") %>%
dplyr::ungroup()
##Unique Sentences
##There are 350 unique bigrams in the orthopedic specialty.
##There are 262 unique bigrams in the radiology speciality.
##There are 1085 unique bigrams in the surgey speciality.
###Words per Category
tokenized.data %>%
dplyr::group_by(medical_specialty) %>%
dplyr::count(ngram, sort = TRUE) %>%
dplyr::top_n(5)
Selecting by n
5
##Use of a Lemmatizer
#A general purpose lemmatizer may not work well for medical data.
This is because medical data contains highly specialized terms that
require accurately trained methods to be trained to accurately token
terms. Some specific issues include:
#a. Medical data usually contains specialized terms, drugs names, and
jargon. Therefore, a general purpose tool may not have the knowledge of
these terms and may not be proficient in accurately identfying the
lemmas.
#b. Medical terms typically come from different parts of speech such
as nouns, verbs, and adjectives. Since the process of lemmatizing
requires mapping to generate correct lemmas, general purpose lemmas
(which have not been trained on medical data) may not process the
variations in medical speech effectively.
lemmatized.data <- tokenized.data %>% dplyr::mutate(lemma=textstem::lemmatize_words(ngram))
lemma.freq <- lemmatized.data %>%
dplyr::count(medical_specialty, lemma) %>%
dplyr::group_by(medical_specialty) %>%
dplyr::mutate(proportion = n / sum(n)) %>%
tidyr::pivot_wider(names_from = medical_specialty, values_from = proportion) %>%
tidyr::pivot_longer(`Surgery`:`Radiology`,
names_to = "medical_specialty", values_to = "proportion")
ggplot2::ggplot(lemma.freq, ggplot2::aes(x=proportion,
y=`Orthopedic`,
color=abs(`Orthopedic` - proportion))) +
ggplot2::geom_abline(color="gray40", lty=2) +
ggplot2::geom_jitter(alpha=0.1, size=2.5, width=0.3, height=0.3) +
ggplot2::geom_text(ggplot2::aes(label=lemma), check_overlap=TRUE, vjust=1.5) +
ggplot2::scale_x_log10(labels=scales::percent_format()) +
ggplot2::scale_y_log10(labels=scales::percent_format()) +
ggplot2::scale_color_gradient(limits=c(0, 0.001), low="darkslategray4", high="gray75") +
ggplot2::facet_wrap(~medical_specialty, ncol = 2) +
ggplot2::theme(legend.position="none") +
ggplot2:: labs(y="Orthopedic", x = NULL)

6
##Analyzing relative proportions
#This plot visualizes the same relative proportion of lemmas in each
speciality. Based on these specialties, I would not expect to see the
exact same relative proportions. There are some differences in the
relationship between orthopaedics and radiology, and orthopaedics and
surgery:
#Orthopaedics and surgery are more likely to have a high degree of
common lemmas. Both specialties are based on standard anatomical
terms.
#Orthopaedics and radiology are less likely to have a high degree of
common lemmas. There is definitely overlap however, radiology may refer
more to the technique of medical imaging. Therefore, many of the
anatomical terms, diagnoses, and treatments related to orthopaedics may
be missed.
7
##Direct comparison of surgery and radiology
library(dplyr)
library(tidyr)
library(ggplot2)
library(textstem)
lemmatized.data <- tokenized.data %>% dplyr::mutate(lemma=textstem::lemmatize_words(ngram))
lemma.freq <- lemmatized.data %>%
dplyr::count(medical_specialty, lemma) %>%
dplyr::group_by(medical_specialty) %>%
dplyr::mutate(proportion = n / sum(n)) %>%
tidyr::pivot_wider(names_from = medical_specialty, values_from = proportion) %>%
tidyr::pivot_longer(`Orthopedic`:`Radiology`,
names_to = "medical_specialty", values_to = "proportion")
ggplot2::ggplot(lemma.freq, ggplot2::aes(x=proportion,
y=`Surgery`,
color=abs(`Surgery` - proportion))) +
ggplot2::geom_abline(color="gray40", lty=2) +
ggplot2::geom_jitter(alpha=0.1, size=2.5, width=0.3, height=0.3) +
ggplot2::geom_text(ggplot2::aes(label=lemma), check_overlap=TRUE, vjust=1.5) +
ggplot2::scale_x_log10(labels=scales::percent_format()) +
ggplot2::scale_y_log10(labels=scales::percent_format()) +
ggplot2::scale_color_gradient(limits=c(0, 0.001), low="darkslategray4", high="gray75") +
ggplot2::facet_wrap(~medical_specialty, ncol = 2) +
ggplot2::theme(legend.position="none") +
ggplot2:: labs(y="Surgery", x = NULL)

###TF-IDF Normailization
lemma.counts <- lemmatized.data %>% dplyr::count(medical_specialty, lemma)
total.counts <- lemma.counts %>%
dplyr::group_by(medical_specialty) %>%
dplyr::summarise(total=sum(n))
all.counts <- dplyr::left_join(lemma.counts, total.counts)
Joining with `by = join_by(medical_specialty)`
all.counts.tfidf <- tidytext::bind_tf_idf(all.counts, lemma, medical_specialty, n)
all.counts.tfidf %>% dplyr::group_by(medical_specialty) %>% dplyr::slice_max(order_by=tf_idf, n=10)
8 ##Stand out lemmas
#The lemmas that stand out in these lists are “admission”,
“diagnosis”, “chief”, and “complaint. Orthopedics often includes
admission and diagnosis of patients based on their presenting complaint,
so a treatment modallity can be selected.
analysis.data %>% dplyr::select(medical_specialty, transcription) %>% dplyr::filter(stringr::str_detect(transcription, 'b.i.d')) %>% dplyr::slice(1)
9
##Extracting unusual top lemma
analysis.data %>% dplyr::select(medical_specialty, transcription) %>% dplyr::filter(stringr::str_detect(transcription, 'atv')) %>% dplyr::slice(1)
LS0tCnRpdGxlOiAiS2FtaW5kYSAtIFByYWN0aWNhbCAyIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgoKYGBge3J9CmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSkKbGlicmFyeShyZWFkcikKbGlicmFyeShkcGx5cikKYGBgCgpgYGB7cn0KbGlicmFyeSh0aWR5cikKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KHNjYWxlcykKYGBgCgpgYGB7cn0KaW5zdGFsbC5wYWNrYWdlcygidGlkeXRleHQiKQppbnN0YWxsLnBhY2thZ2VzKCJ0ZXh0c3RlbSIpCmluc3RhbGwucGFja2FnZXMoImNsaW5zcGFjeSIpCmluc3RhbGwucGFja2FnZXMoInRvcGljbW9kZWxzIikKaW5zdGFsbC5wYWNrYWdlcygicmVzaGFwZTIiKQppbnN0YWxsLnBhY2thZ2VzKCJzdHJpbmdyIikKYGBgCgpgYGB7cn0KbGlicmFyeSh0aWR5dGV4dCkKbGlicmFyeSh0ZXh0c3RlbSkKbGlicmFyeShjbGluc3BhY3kpCmxpYnJhcnkodG9waWNtb2RlbHMpCmxpYnJhcnkocmVzaGFwZTIpCmxpYnJhcnkoc3RyaW5ncikKYGBgCgojIyNEYXRhIFBhcnNpbmcKCmBgYHtyfQpyYXcuZGF0YSA8LSBjbGluc3BhY3k6OmRhdGFzZXRfbXRzYW1wbGVzKCkKZHBseXI6OmdsaW1wc2UocmF3LmRhdGEpCmBgYAoqKjEqKiAKCiMjI0RhdGEgRGVzY3JpcHRpb24KCiMjTm90ZUlEIC0gVGhpcyB2YXJpYWJsZSBpcyB0aGUgdW5pcXVlIElEIGZvciBlYWNoIG5vdGUuCgojI0Rlc2NyaXB0aW9uIC0gVGhpcyBvdXRwdXQgcHJvdmlkZXMgYSBzdW1tYXJ5IG9mIHRoZSBTT0FQIG5vdGVzLgoKIyNNZWRpY2FsX1NwZWNpYWx0eSAtIFRoaXMgdmFyaWFibGUgcmVmZXJzIHRvIHRoZSBtZWRpY2FsIHNwZWNpYWxpdHkgdGhlIHBhdGllbnQgaXMgdmlzaXRpbmcuCgojI1NhbXBsZV9OYW1lIC0gVGhpcyB2YXJpYWJsZSByZWZlcnMgdG8gdGhlIHByb2NlZHVyZXMgdW5kZXJnb25lIGZvciBlYWNoIHBhdGllbnQuCgojI1RyYW5zY3JpcHRpb24gLSBUaGlzIG91dHB1dCBwcm92aWRlcyBhIGZ1bGwgdHJhbnNjcmlwdCBvZiB0aGUgcGh5c2ljaWFucyBTT0FQIG5vdGVzLgoKIyNLZXl3b3JkcyAtIFRoaXMgb3V0cHV0IGNvbGxlY3RzIGtleXdvcmRzIGZyb20gdGhlIE1lZGljYWxfU3BlY2lhbGl0eSwgU2FtcGxlX05hbWUsIGFuZCBUcmFuc2NyaXB0aW9uIHZhcmlhYmxlcy4KCgoKYGBge3IgcmF3ZGF0YSBtZWRpY2FsIHNwZWNpYWxpdGllc30KCnJhdy5kYXRhICU+JSBkcGx5cjo6c2VsZWN0KG1lZGljYWxfc3BlY2lhbHR5KSAlPiUgZHBseXI6Om5fZGlzdGluY3QoKQpgYGAKCiMjI1RyYW5zY3JpcHRzIHBlciBzcGVjaWFsdHkKCmBgYHtyfQpnZ3Bsb3QyOjpnZ3Bsb3QocmF3LmRhdGEsIGdncGxvdDI6OmFlcyh5PW1lZGljYWxfc3BlY2lhbHR5KSkgKyBnZ3Bsb3QyOjpnZW9tX2JhcigpICsgbGFicyh4PSJEb2N1bWVudCBDb3VudCIsIHk9Ik1lZGljYWwgU3BlY2lhbGl0eSIpCmBgYAoKYGBge3J9CmZpbHRlcmVkLmRhdGEgPC0gcmF3LmRhdGEgJT4lIGRwbHlyOjpmaWx0ZXIobWVkaWNhbF9zcGVjaWFsdHkgJWluJSBjKCJPcnRob3BlZGljIiwgIlJhZGlvbG9neSIsICJTdXJnZXJ5IikpIApgYGAKCgojIyNUZXh0IFByb2Nlc3NpbmcKCmBgYHtyIHRleHQgcHJvY2Vzc2luZ30KCmFuYWx5c2lzLmRhdGEgPC0gZmlsdGVyZWQuZGF0YSAlPiUKICB1bm5lc3RfdG9rZW5zKHdvcmQsIHRyYW5zY3JpcHRpb24pICU+JQogIG11dGF0ZSh3b3JkID0gc3RyX3JlcGxhY2VfYWxsKHdvcmQsICJbXls6YWxudW06XV0iLCAiIikpICU+JQogIGZpbHRlcighc3RyX2RldGVjdCh3b3JkLCAiWzAtOV0iKSkgJT4lCiAgYW50aV9qb2luKHN0b3Bfd29yZHMpICU+JQogIGdyb3VwX2J5KG5vdGVfaWQpICU+JQogIHN1bW1hcmlzZSh0cmFuc2NyaXB0aW9uID0gcGFzdGUod29yZCwgY29sbGFwc2UgPSAiICIpKSAlPiUKICBsZWZ0X2pvaW4oc2VsZWN0KGZpbHRlcmVkLmRhdGEsIC10cmFuc2NyaXB0aW9uKSwgYnkgPSAibm90ZV9pZCIpCmBgYAoKCmBgYHtyfQp0b2tlbml6ZWQuZGF0YS51bmlncmFtIDwtIGFuYWx5c2lzLmRhdGEgJT4lIHRpZHl0ZXh0Ojp1bm5lc3RfdG9rZW5zKHdvcmQsIHRyYW5zY3JpcHRpb24sIHRvX2xvd2VyPVRSVUUpCmBgYAoKYGBge3J9CnRva2VuaXplZC5kYXRhIDwtIGFuYWx5c2lzLmRhdGEgJT4lIHRpZHl0ZXh0Ojp1bm5lc3RfdG9rZW5zKG5ncmFtLCB0cmFuc2NyaXB0aW9uLCB0b2tlbiA9ICJuZ3JhbXMiLCBuPTIsIHRvX2xvd2VyID0gVFJVRSkKYGBgCgoqKjIqKgoKIyMjVW5pcXVlIFRva2VucyBwZXIgU3BlY2lhbGl0eQoKYGBge3IgdG9rZW4gdW5pZ3JhbX0KdG9rZW5pemVkLmRhdGEudW5pZ3JhbSAlPiUgZHBseXI6Omdyb3VwX2J5KG1lZGljYWxfc3BlY2lhbHR5KSAlPiUgZHBseXI6OmRpc3RpbmN0KHdvcmQpICU+JSBkcGx5cjo6c3VtbWFyaXNlKG49ZHBseXI6Om4oKSkKYGBgCiMjVW5pcXVlIFVuaWdyYW1zCgojI1RoZXJlIGFyZSA3NjgyIHVuaXF1ZSB1bmlncmFtcyBpbiB0aGUgb3J0aG9wZWRpYyBzcGVjaWFsaXR5LgoKIyNUaGVyZSBhcmUgNTkzNSB1bmlxdWUgdW5pZ3JhbXMgaW4gdGhlIHJhZGlvbG9neSBzcGVjaWFsaXR5LgoKIyNUaGVyZSBhcmUgMTE5NzcgdW5pcXVlIHVuaWdyYW1zIGluIHRoZSBzdXJnZXJ5IHNwZWNpYWxpdHkuCgoKYGBge3IgdG9rZW4gYmlncmFtfQoKdG9rZW5pemVkLmRhdGEgJT4lIGRwbHlyOjpncm91cF9ieShtZWRpY2FsX3NwZWNpYWx0eSkgJT4lIGRwbHlyOjpkaXN0aW5jdChuZ3JhbSkgJT4lIGRwbHlyOjpzdW1tYXJpc2Uobj1kcGx5cjo6bigpKQpgYGAKCgpgYGB7ciB1bmlncmFtIHRva2VuIGRpc3RyaWJ1dGlvbn0KCndvcmRfY291bnRzIDwtIHRva2VuaXplZC5kYXRhLnVuaWdyYW0gJT4lCiAgICBncm91cF9ieSh3b3JkKSAlPiUKICAgIHN1bW1hcmlzZShjb3VudCA9IG4oKSkgJT4lCiAgICB1bmdyb3VwKCkgJT4lCiAgICBhcnJhbmdlKGRlc2MoY291bnQpKQoKY291bnRfZGlzdHJpYnV0aW9uIDwtIHdvcmRfY291bnRzICU+JQogIGdyb3VwX2J5KGNvdW50KSAlPiUKICBzdW1tYXJpc2UobnVtX3dvcmRzID0gbigpKSAlPiUKICB1bmdyb3VwKCkKIAogZ2dwbG90Mjo6Z2dwbG90KGNvdW50X2Rpc3RyaWJ1dGlvbiwgYWVzKHggPSBjb3VudCwgeSA9IG51bV93b3JkcykpICsKICBnZW9tX3BvaW50KCkgKwogIGxhYnModGl0bGUgPSAiU2NhdHRlciBQbG90IG9mIENvdW50IERpc3RyaWJ1dGlvbiIsCiAgICAgICB4ID0gIkNvdW50IG9mIFVuaXF1ZSBXb3JkcyIsCiAgICAgICB5ID0gIk51bWJlciBvZiBXb3JkcyIpCmBgYAoKYGBge3IgYmlncmFtIHRva2VuIGRpc3RyaWJ1dGlvbn0Kd29yZF9jb3VudHMgPC0gdG9rZW5pemVkLmRhdGEgJT4lCiAgICBncm91cF9ieShuZ3JhbSkgJT4lCiAgICBzdW1tYXJpc2UoY291bnQgPSBuKCkpICU+JQogICAgdW5ncm91cCgpICU+JQogICAgYXJyYW5nZShkZXNjKGNvdW50KSkKCmNvdW50X2Rpc3RyaWJ1dGlvbiA8LSB3b3JkX2NvdW50cyAlPiUKICBncm91cF9ieShjb3VudCkgJT4lCiAgc3VtbWFyaXNlKG51bV93b3JkcyA9IG4oKSkgJT4lCiAgdW5ncm91cCgpCiAKIGdncGxvdDI6OmdncGxvdChjb3VudF9kaXN0cmlidXRpb24sIGFlcyh4ID0gY291bnQsIHkgPSBudW1fd29yZHMpKSArCiAgZ2VvbV9wb2ludCgpICsKICBsYWJzKHRpdGxlID0gIlNjYXR0ZXIgUGxvdCBvZiBDb3VudCBEaXN0cmlidXRpb24iLAogICAgICAgeCA9ICJDb3VudCBvZiBVbmlxdWUgQmlncmFtcyIsCiAgICAgICB5ID0gIk51bWJlciBvZiBXb3JkcyIpCmBgYAoKKiozKioKIyMjVW5pcXVlIGJpZ3JhbXMgcGVyIGNhdGVnb3J5CgoKYGBge3IgdG9rZW4gYmlncmFtMn0KCnRva2VuaXplZC5kYXRhICU+JSBkcGx5cjo6Z3JvdXBfYnkobWVkaWNhbF9zcGVjaWFsdHkpICU+JSBkcGx5cjo6ZGlzdGluY3QobmdyYW0pICU+JSBkcGx5cjo6c3VtbWFyaXNlKG49ZHBseXI6Om4oKSkKYGBgCgoKIyNVbmlxdWUgQmlncmFtcwoKIyNUaGVyZSBhcmUgNTU3MzIgdW5pcXVlIGJpZ3JhbXMgaW4gdGhlIG9ydGhvcGVkaWMgc3BlY2lhbHR5LgoKIyNUaGVyZSBhcmUgMjgyOTcgdW5pcXVlIGJpZ3JhbXMgaW4gdGhlIHJhZGlvbG9neSBzcGVjaWFsaXR5LgoKIyNUaGVyZSBhcmUgMTMwNDA0IHVuaXF1ZSBiaWdyYW1zIGluIHRoZSBzdXJnZXkgc3BlY2lhbGl0eS4KCgoqKjQqKgoKIyNVbmlxdWUgU2VudGVuY2VzCgpgYGB7ciBzZW50ZW5jZXN9CmFuYWx5c2lzLmRhdGEgPC0gZmlsdGVyZWQuZGF0YSAlPiUKICB1bm5lc3RfdG9rZW5zKHNlbnRlbmNlLCB0cmFuc2NyaXB0aW9uLCB0b2tlbiA9ICJzZW50ZW5jZXMiKSAlPiUKICBtdXRhdGUoc2VudGVuY2UgPSBzdHJfcmVwbGFjZV9hbGwoc2VudGVuY2UsICJbXls6YWxudW06XVxcc10iLCAiIikpICU+JQogIGZpbHRlcighc3RyX2RldGVjdChzZW50ZW5jZSwgIlswLTldIikpICU+JQogIGNyb3NzX2pvaW4oc3RvcF93b3JkcykgJT4lCiAgZ3JvdXBfYnkobm90ZV9pZCkgJT4lCiAgc3VtbWFyaXNlKHRyYW5zY3JpcHRpb24gPSBwYXN0ZShzZW50ZW5jZSwgY29sbGFwc2UgPSAiICIpKSAlPiUKICBsZWZ0X2pvaW4oc2VsZWN0KGZpbHRlcmVkLmRhdGEsIC10cmFuc2NyaXB0aW9uKSwgYnkgPSAibm90ZV9pZCIpCmBgYAoKYGBge3J9Cj9jcm9zc19qb2luCmBgYAoKYGBge3J9Cj9zdHJfZGV0ZWN0CmBgYAoKYGBge3J9CnRva2VuaXplZC5kYXRhLnNlbnRlbmNlIDwtIGFuYWx5c2lzLmRhdGEgJT4lIHRpZHl0ZXh0Ojp1bm5lc3RfdG9rZW5zKG5ncmFtLCB0cmFuc2NyaXB0aW9uLCB0b2tlbiA9ICJzZW50ZW5jZXMiLCB0b19sb3dlciA9IFRSVUUpCmBgYAoKYGBge3J9CnRva2VuaXplZC5kYXRhLnNlbnRlbmNlICU+JQogIGRwbHlyOjpncm91cF9ieShtZWRpY2FsX3NwZWNpYWx0eSkgJT4lCiAgZHBseXI6OmNvdW50KG5hbWUgPSAibiIpICU+JQogIGRwbHlyOjp1bmdyb3VwKCkKYGBgCgojI1VuaXF1ZSBTZW50ZW5jZXMKCiMjVGhlcmUgYXJlIDM1MCB1bmlxdWUgYmlncmFtcyBpbiB0aGUgb3J0aG9wZWRpYyBzcGVjaWFsdHkuCgojI1RoZXJlIGFyZSAyNjIgdW5pcXVlIGJpZ3JhbXMgaW4gdGhlIHJhZGlvbG9neSBzcGVjaWFsaXR5LgoKIyNUaGVyZSBhcmUgMTA4NSB1bmlxdWUgYmlncmFtcyBpbiB0aGUgc3VyZ2V5IHNwZWNpYWxpdHkuCgojIyNXb3JkcyBwZXIgQ2F0ZWdvcnkKCmBgYHtyfQp0b2tlbml6ZWQuZGF0YSAlPiUKICBkcGx5cjo6Z3JvdXBfYnkobWVkaWNhbF9zcGVjaWFsdHkpICU+JQogIGRwbHlyOjpjb3VudChuZ3JhbSwgc29ydCA9IFRSVUUpICU+JQogIGRwbHlyOjp0b3Bfbig1KQpgYGAKCioqNSoqIAoKIyNVc2Ugb2YgYSBMZW1tYXRpemVyCgojQSBnZW5lcmFsIHB1cnBvc2UgbGVtbWF0aXplciBtYXkgbm90IHdvcmsgd2VsbCBmb3IgbWVkaWNhbCBkYXRhLiBUaGlzIGlzIGJlY2F1c2UgbWVkaWNhbCBkYXRhIGNvbnRhaW5zIGhpZ2hseSBzcGVjaWFsaXplZCB0ZXJtcyB0aGF0IHJlcXVpcmUgYWNjdXJhdGVseSB0cmFpbmVkIG1ldGhvZHMgdG8gYmUgdHJhaW5lZCB0byBhY2N1cmF0ZWx5IHRva2VuIHRlcm1zLiBTb21lIHNwZWNpZmljIGlzc3VlcyBpbmNsdWRlOgoKI2EuIE1lZGljYWwgZGF0YSB1c3VhbGx5IGNvbnRhaW5zIHNwZWNpYWxpemVkIHRlcm1zLCBkcnVncyBuYW1lcywgYW5kIGphcmdvbi4gVGhlcmVmb3JlLCBhIGdlbmVyYWwgcHVycG9zZSB0b29sIG1heSBub3QgaGF2ZSB0aGUga25vd2xlZGdlIG9mIHRoZXNlIHRlcm1zIGFuZCBtYXkgbm90IGJlIHByb2ZpY2llbnQgaW4gYWNjdXJhdGVseSBpZGVudGZ5aW5nIHRoZSBsZW1tYXMuCgojYi4gTWVkaWNhbCB0ZXJtcyB0eXBpY2FsbHkgY29tZSBmcm9tIGRpZmZlcmVudCBwYXJ0cyBvZiBzcGVlY2ggc3VjaCBhcyBub3VucywgdmVyYnMsIGFuZCBhZGplY3RpdmVzLiBTaW5jZSB0aGUgcHJvY2VzcyBvZiBsZW1tYXRpemluZyByZXF1aXJlcyBtYXBwaW5nIHRvIGdlbmVyYXRlIGNvcnJlY3QgbGVtbWFzLCBnZW5lcmFsIHB1cnBvc2UgbGVtbWFzICh3aGljaCBoYXZlIG5vdCBiZWVuIHRyYWluZWQgb24gbWVkaWNhbCBkYXRhKSBtYXkgbm90IHByb2Nlc3MgdGhlIHZhcmlhdGlvbnMgaW4gbWVkaWNhbCBzcGVlY2ggZWZmZWN0aXZlbHkuCgoKYGBge3IgbGVtbWF0aXplcn0KbGVtbWF0aXplZC5kYXRhIDwtIHRva2VuaXplZC5kYXRhICU+JSBkcGx5cjo6bXV0YXRlKGxlbW1hPXRleHRzdGVtOjpsZW1tYXRpemVfd29yZHMobmdyYW0pKQpgYGAKCgpgYGB7cn0KbGVtbWEuZnJlcSA8LSBsZW1tYXRpemVkLmRhdGEgJT4lIAogIGRwbHlyOjpjb3VudChtZWRpY2FsX3NwZWNpYWx0eSwgbGVtbWEpICU+JQogIGRwbHlyOjpncm91cF9ieShtZWRpY2FsX3NwZWNpYWx0eSkgJT4lIAogIGRwbHlyOjptdXRhdGUocHJvcG9ydGlvbiA9IG4gLyBzdW0obikpICU+JQogIHRpZHlyOjpwaXZvdF93aWRlcihuYW1lc19mcm9tID0gbWVkaWNhbF9zcGVjaWFsdHksIHZhbHVlc19mcm9tID0gcHJvcG9ydGlvbikgJT4lCiAgdGlkeXI6OnBpdm90X2xvbmdlcihgU3VyZ2VyeWA6YFJhZGlvbG9neWAsCiAgICAgICAgICAgICAgIG5hbWVzX3RvID0gIm1lZGljYWxfc3BlY2lhbHR5IiwgdmFsdWVzX3RvID0gInByb3BvcnRpb24iKQpgYGAKCgpgYGB7cn0KZ2dwbG90Mjo6Z2dwbG90KGxlbW1hLmZyZXEsIGdncGxvdDI6OmFlcyh4PXByb3BvcnRpb24sIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHk9YE9ydGhvcGVkaWNgLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGNvbG9yPWFicyhgT3J0aG9wZWRpY2AgLSBwcm9wb3J0aW9uKSkpICsgCiAgZ2dwbG90Mjo6Z2VvbV9hYmxpbmUoY29sb3I9ImdyYXk0MCIsIGx0eT0yKSArCiAgZ2dwbG90Mjo6Z2VvbV9qaXR0ZXIoYWxwaGE9MC4xLCBzaXplPTIuNSwgd2lkdGg9MC4zLCBoZWlnaHQ9MC4zKSArCiAgZ2dwbG90Mjo6Z2VvbV90ZXh0KGdncGxvdDI6OmFlcyhsYWJlbD1sZW1tYSksIGNoZWNrX292ZXJsYXA9VFJVRSwgdmp1c3Q9MS41KSArCiAgZ2dwbG90Mjo6c2NhbGVfeF9sb2cxMChsYWJlbHM9c2NhbGVzOjpwZXJjZW50X2Zvcm1hdCgpKSArIAogIGdncGxvdDI6OnNjYWxlX3lfbG9nMTAobGFiZWxzPXNjYWxlczo6cGVyY2VudF9mb3JtYXQoKSkgKyAKICBnZ3Bsb3QyOjpzY2FsZV9jb2xvcl9ncmFkaWVudChsaW1pdHM9YygwLCAwLjAwMSksIGxvdz0iZGFya3NsYXRlZ3JheTQiLCBoaWdoPSJncmF5NzUiKSArCiAgZ2dwbG90Mjo6ZmFjZXRfd3JhcCh+bWVkaWNhbF9zcGVjaWFsdHksIG5jb2wgPSAyKSArCiAgZ2dwbG90Mjo6dGhlbWUobGVnZW5kLnBvc2l0aW9uPSJub25lIikgKwogIGdncGxvdDI6OiBsYWJzKHk9Ik9ydGhvcGVkaWMiLCB4ID0gTlVMTCkKYGBgCgoqKjYqKgoKIyNBbmFseXppbmcgcmVsYXRpdmUgcHJvcG9ydGlvbnMgCgojVGhpcyBwbG90IHZpc3VhbGl6ZXMgdGhlIHNhbWUgcmVsYXRpdmUgcHJvcG9ydGlvbiBvZiBsZW1tYXMgaW4gZWFjaCBzcGVjaWFsaXR5LiBCYXNlZCBvbiB0aGVzZSBzcGVjaWFsdGllcywgSSB3b3VsZCBub3QgZXhwZWN0IHRvIHNlZSB0aGUgZXhhY3Qgc2FtZSByZWxhdGl2ZSBwcm9wb3J0aW9ucy4gVGhlcmUgYXJlIHNvbWUgZGlmZmVyZW5jZXMgaW4gdGhlIHJlbGF0aW9uc2hpcCBiZXR3ZWVuIG9ydGhvcGFlZGljcyBhbmQgcmFkaW9sb2d5LCBhbmQgb3J0aG9wYWVkaWNzIGFuZCBzdXJnZXJ5OgoKI09ydGhvcGFlZGljcyBhbmQgc3VyZ2VyeSBhcmUgbW9yZSBsaWtlbHkgdG8gaGF2ZSBhIGhpZ2ggZGVncmVlIG9mIGNvbW1vbiBsZW1tYXMuIEJvdGggc3BlY2lhbHRpZXMgYXJlIGJhc2VkIG9uIHN0YW5kYXJkIGFuYXRvbWljYWwgdGVybXMuCgojT3J0aG9wYWVkaWNzIGFuZCByYWRpb2xvZ3kgYXJlIGxlc3MgbGlrZWx5IHRvIGhhdmUgYSBoaWdoIGRlZ3JlZSBvZiBjb21tb24gbGVtbWFzLiBUaGVyZSBpcyBkZWZpbml0ZWx5IG92ZXJsYXAgaG93ZXZlciwgcmFkaW9sb2d5IG1heSByZWZlciBtb3JlIHRvIHRoZSB0ZWNobmlxdWUgb2YgbWVkaWNhbCBpbWFnaW5nLiBUaGVyZWZvcmUsIG1hbnkgb2YgdGhlIGFuYXRvbWljYWwgdGVybXMsIGRpYWdub3NlcywgYW5kIHRyZWF0bWVudHMgcmVsYXRlZCB0byBvcnRob3BhZWRpY3MgbWF5IGJlIG1pc3NlZC4KCioqNyoqCgojI0RpcmVjdCBjb21wYXJpc29uIG9mIHN1cmdlcnkgYW5kIHJhZGlvbG9neQoKYGBge3J9CmxpYnJhcnkoZHBseXIpCmxpYnJhcnkodGlkeXIpCmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeSh0ZXh0c3RlbSkKYGBgCgoKYGBge3J9CmxlbW1hdGl6ZWQuZGF0YSA8LSB0b2tlbml6ZWQuZGF0YSAlPiUgZHBseXI6Om11dGF0ZShsZW1tYT10ZXh0c3RlbTo6bGVtbWF0aXplX3dvcmRzKG5ncmFtKSkKYGBgCgoKYGBge3J9CmxlbW1hLmZyZXEgPC0gbGVtbWF0aXplZC5kYXRhICU+JSAKICBkcGx5cjo6Y291bnQobWVkaWNhbF9zcGVjaWFsdHksIGxlbW1hKSAlPiUKICBkcGx5cjo6Z3JvdXBfYnkobWVkaWNhbF9zcGVjaWFsdHkpICU+JSAKICBkcGx5cjo6bXV0YXRlKHByb3BvcnRpb24gPSBuIC8gc3VtKG4pKSAlPiUKICB0aWR5cjo6cGl2b3Rfd2lkZXIobmFtZXNfZnJvbSA9IG1lZGljYWxfc3BlY2lhbHR5LCB2YWx1ZXNfZnJvbSA9IHByb3BvcnRpb24pICU+JQogIHRpZHlyOjpwaXZvdF9sb25nZXIoYE9ydGhvcGVkaWNgOmBSYWRpb2xvZ3lgLAogICAgICAgICAgICAgICBuYW1lc190byA9ICJtZWRpY2FsX3NwZWNpYWx0eSIsIHZhbHVlc190byA9ICJwcm9wb3J0aW9uIikKYGBgCgpgYGB7cn0KZ2dwbG90Mjo6Z2dwbG90KGxlbW1hLmZyZXEsIGdncGxvdDI6OmFlcyh4PXByb3BvcnRpb24sIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHk9YFN1cmdlcnlgLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGNvbG9yPWFicyhgU3VyZ2VyeWAgLSBwcm9wb3J0aW9uKSkpICsgCiAgZ2dwbG90Mjo6Z2VvbV9hYmxpbmUoY29sb3I9ImdyYXk0MCIsIGx0eT0yKSArCiAgZ2dwbG90Mjo6Z2VvbV9qaXR0ZXIoYWxwaGE9MC4xLCBzaXplPTIuNSwgd2lkdGg9MC4zLCBoZWlnaHQ9MC4zKSArCiAgZ2dwbG90Mjo6Z2VvbV90ZXh0KGdncGxvdDI6OmFlcyhsYWJlbD1sZW1tYSksIGNoZWNrX292ZXJsYXA9VFJVRSwgdmp1c3Q9MS41KSArCiAgZ2dwbG90Mjo6c2NhbGVfeF9sb2cxMChsYWJlbHM9c2NhbGVzOjpwZXJjZW50X2Zvcm1hdCgpKSArIAogIGdncGxvdDI6OnNjYWxlX3lfbG9nMTAobGFiZWxzPXNjYWxlczo6cGVyY2VudF9mb3JtYXQoKSkgKyAKICBnZ3Bsb3QyOjpzY2FsZV9jb2xvcl9ncmFkaWVudChsaW1pdHM9YygwLCAwLjAwMSksIGxvdz0iZGFya3NsYXRlZ3JheTQiLCBoaWdoPSJncmF5NzUiKSArCiAgZ2dwbG90Mjo6ZmFjZXRfd3JhcCh+bWVkaWNhbF9zcGVjaWFsdHksIG5jb2wgPSAyKSArCiAgZ2dwbG90Mjo6dGhlbWUobGVnZW5kLnBvc2l0aW9uPSJub25lIikgKwogIGdncGxvdDI6OiBsYWJzKHk9IlN1cmdlcnkiLCB4ID0gTlVMTCkKYGBgCgojIyNURi1JREYgTm9ybWFpbGl6YXRpb24KCmBgYHtyIGxlbW1hIGNvdW50c30KbGVtbWEuY291bnRzIDwtIGxlbW1hdGl6ZWQuZGF0YSAlPiUgZHBseXI6OmNvdW50KG1lZGljYWxfc3BlY2lhbHR5LCBsZW1tYSkKdG90YWwuY291bnRzIDwtIGxlbW1hLmNvdW50cyAlPiUgCiAgICAgICAgICAgICAgICAgICAgICBkcGx5cjo6Z3JvdXBfYnkobWVkaWNhbF9zcGVjaWFsdHkpICU+JSAKICAgICAgICAgICAgICAgICAgICAgIGRwbHlyOjpzdW1tYXJpc2UodG90YWw9c3VtKG4pKQoKYWxsLmNvdW50cyA8LSBkcGx5cjo6bGVmdF9qb2luKGxlbW1hLmNvdW50cywgdG90YWwuY291bnRzKQpgYGAKCmBgYHtyfQphbGwuY291bnRzLnRmaWRmIDwtIHRpZHl0ZXh0OjpiaW5kX3RmX2lkZihhbGwuY291bnRzLCBsZW1tYSwgbWVkaWNhbF9zcGVjaWFsdHksIG4pIApgYGAKCmBgYHtyfQphbGwuY291bnRzLnRmaWRmICU+JSBkcGx5cjo6Z3JvdXBfYnkobWVkaWNhbF9zcGVjaWFsdHkpICU+JSBkcGx5cjo6c2xpY2VfbWF4KG9yZGVyX2J5PXRmX2lkZiwgbj0xMCkKYGBgCgoqKjgqKgojI1N0YW5kIG91dCBsZW1tYXMKCiNUaGUgbGVtbWFzIHRoYXQgc3RhbmQgb3V0IGluIHRoZXNlIGxpc3RzIGFyZSAiYWRtaXNzaW9uIiwgImRpYWdub3NpcyIsICJjaGllZiIsIGFuZCAiY29tcGxhaW50LiBPcnRob3BlZGljcyBvZnRlbiBpbmNsdWRlcyBhZG1pc3Npb24gYW5kIGRpYWdub3NpcyBvZiBwYXRpZW50cyBiYXNlZCBvbiB0aGVpciBwcmVzZW50aW5nIGNvbXBsYWludCwgc28gYSB0cmVhdG1lbnQgbW9kYWxsaXR5IGNhbiBiZSBzZWxlY3RlZC4KCgpgYGB7cn0KYW5hbHlzaXMuZGF0YSAlPiUgZHBseXI6OnNlbGVjdChtZWRpY2FsX3NwZWNpYWx0eSwgdHJhbnNjcmlwdGlvbikgJT4lIGRwbHlyOjpmaWx0ZXIoc3RyaW5ncjo6c3RyX2RldGVjdCh0cmFuc2NyaXB0aW9uLCAnYi5pLmQnKSkgJT4lIGRwbHlyOjpzbGljZSgxKQpgYGAKKio5KioKCiMjRXh0cmFjdGluZyB1bnVzdWFsIHRvcCBsZW1tYQoKCmBgYHtyfQphbmFseXNpcy5kYXRhICU+JSBkcGx5cjo6c2VsZWN0KG1lZGljYWxfc3BlY2lhbHR5LCB0cmFuc2NyaXB0aW9uKSAlPiUgZHBseXI6OmZpbHRlcihzdHJpbmdyOjpzdHJfZGV0ZWN0KHRyYW5zY3JpcHRpb24sICdhdHYnKSkgJT4lIGRwbHlyOjpzbGljZSgxKQpgYGAKCg==